From: Keir Fraser Date: Tue, 23 Jun 2009 10:14:24 +0000 (+0100) Subject: vtd: IO NUMA support X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~13716 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https://%22%22/%22http:/www.example.com/cgi/%22https:/%22%22?a=commitdiff_plain;h=bd24e968e730d32637236aa01e88af834a169ff4;p=xen.git vtd: IO NUMA support This patch adds VT-d RHSA processing for IO NUMA support. The basic idea is to parse ACPI RHSA structure to obtain VT-d HW to proximity domain mapping. This mapping is then used when allocating pages for Vt-d HW data structures. Signed-off-by: Allen Kay --- diff --git a/xen/drivers/passthrough/vtd/dmar.c b/xen/drivers/passthrough/vtd/dmar.c index 68881bc121..0912c97ff0 100644 --- a/xen/drivers/passthrough/vtd/dmar.c +++ b/xen/drivers/passthrough/vtd/dmar.c @@ -42,6 +42,7 @@ LIST_HEAD(acpi_drhd_units); LIST_HEAD(acpi_rmrr_units); LIST_HEAD(acpi_atsr_units); +LIST_HEAD(acpi_rhsa_units); u8 dmar_host_address_width; @@ -127,6 +128,20 @@ struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id) return NULL; } +struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu) +{ + struct acpi_drhd_unit *drhd; + + if ( iommu == NULL ) + return NULL; + + list_for_each_entry( drhd, &acpi_drhd_units, list ) + if ( drhd->iommu == iommu ) + return drhd; + + return NULL; +} + struct iommu * ioapic_to_iommu(unsigned int apic_id) { struct acpi_drhd_unit *drhd; @@ -157,6 +172,9 @@ struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *pdev) struct acpi_drhd_unit *include_all = NULL; int i; + if ( pdev == NULL ) + return NULL; + if (pdev->info.is_extfn) { bus = pdev->bus; devfn = 0; @@ -199,6 +217,21 @@ struct acpi_atsr_unit * acpi_find_matched_atsr_unit(u8 bus, u8 devfn) return all_ports; } +struct acpi_rhsa_unit * drhd_to_rhsa(struct acpi_drhd_unit *drhd) +{ + struct acpi_rhsa_unit *rhsa; + + if ( drhd == NULL ) + return NULL; + + list_for_each_entry ( rhsa, &acpi_rhsa_units, list ) + { + if ( rhsa->address == drhd->address ) + return rhsa; + } + return NULL; +} + /* * Count number of devices in device scope. Do not include PCI sub * hierarchies. @@ -453,6 +486,25 @@ acpi_parse_one_atsr(struct acpi_dmar_entry_header *header) return ret; } +static int __init +acpi_parse_one_rhsa(struct acpi_dmar_entry_header *header) +{ + struct acpi_table_rhsa *rhsa = (struct acpi_table_rhsa *)header; + struct acpi_rhsa_unit *rhsau; + int ret = 0; + + rhsau = xmalloc(struct acpi_rhsa_unit); + if ( !rhsau ) + return -ENOMEM; + memset(rhsau, 0, sizeof(struct acpi_rhsa_unit)); + + rhsau->address = rhsa->address; + rhsau->domain = rhsa->domain; + list_add_tail(&rhsau->list, &acpi_rhsa_units); + + return ret; +} + static int __init acpi_parse_dmar(struct acpi_table_header *table) { struct acpi_table_dmar *dmar; @@ -492,6 +544,10 @@ static int __init acpi_parse_dmar(struct acpi_table_header *table) dprintk(XENLOG_INFO VTDPREFIX, "found ACPI_DMAR_ATSR\n"); ret = acpi_parse_one_atsr(entry_header); break; + case ACPI_DMAR_RHSA: + dprintk(XENLOG_INFO VTDPREFIX, "found ACPI_DMAR_RHSA\n"); + ret = acpi_parse_one_rhsa(entry_header); + break; default: dprintk(XENLOG_WARNING VTDPREFIX, "Unknown DMAR structure type\n"); ret = -EINVAL; diff --git a/xen/drivers/passthrough/vtd/dmar.h b/xen/drivers/passthrough/vtd/dmar.h index f0bfb541a5..5f78a95f94 100644 --- a/xen/drivers/passthrough/vtd/dmar.h +++ b/xen/drivers/passthrough/vtd/dmar.h @@ -69,6 +69,11 @@ struct acpi_atsr_unit { u8 all_ports:1; }; +struct acpi_rhsa_unit { + struct list_head list; + u64 address; + u32 domain; +}; #define for_each_drhd_unit(drhd) \ list_for_each_entry(drhd, &acpi_drhd_units, list) diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h index dadd5517f1..80e59cba98 100644 --- a/xen/drivers/passthrough/vtd/extern.h +++ b/xen/drivers/passthrough/vtd/extern.h @@ -45,6 +45,8 @@ int iommu_flush_iec_global(struct iommu *iommu); int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx); struct iommu * ioapic_to_iommu(unsigned int apic_id); struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id); +struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu); +struct acpi_rhsa_unit * drhd_to_rhsa(struct acpi_drhd_unit *drhd); void clear_fault_bits(struct iommu *iommu); int ats_device(int seg, int bus, int devfn); int enable_ats_device(int seg, int bus, int devfn); diff --git a/xen/drivers/passthrough/vtd/ia64/vtd.c b/xen/drivers/passthrough/vtd/ia64/vtd.c index 91d6338d58..5158060457 100644 --- a/xen/drivers/passthrough/vtd/ia64/vtd.c +++ b/xen/drivers/passthrough/vtd/ia64/vtd.c @@ -46,25 +46,6 @@ void unmap_vtd_domain_page(void *va) unmap_domain_page(va); } -/* Allocate page table, return its machine address */ -u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages) -{ - struct page_info *pg; - u64 *vaddr; - - pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), - d ? MEMF_node(domain_to_node(d)) : 0); - vaddr = map_domain_page(page_to_mfn(pg)); - if ( !vaddr ) - return 0; - memset(vaddr, 0, PAGE_SIZE * npages); - - iommu_flush_cache_page(vaddr, npages); - unmap_domain_page(vaddr); - - return page_to_maddr(pg); -} - void free_pgtable_maddr(u64 maddr) { if ( maddr != 0 ) diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c index ef2ca1e5e0..47350732b8 100644 --- a/xen/drivers/passthrough/vtd/intremap.c +++ b/xen/drivers/passthrough/vtd/intremap.c @@ -614,6 +614,7 @@ void msi_msg_write_remap_rte( int enable_intremap(struct iommu *iommu) { + struct acpi_drhd_unit *drhd; struct ir_ctrl *ir_ctrl; u32 sts, gcmd; unsigned long flags; @@ -623,7 +624,8 @@ int enable_intremap(struct iommu *iommu) ir_ctrl = iommu_ir_ctrl(iommu); if ( ir_ctrl->iremap_maddr == 0 ) { - ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1); + drhd = iommu_to_drhd(iommu); + ir_ctrl->iremap_maddr = alloc_pgtable_maddr(drhd, 1); if ( ir_ctrl->iremap_maddr == 0 ) { dprintk(XENLOG_WARNING VTDPREFIX, diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c index 314cf0afc4..25e5fa9c1a 100644 --- a/xen/drivers/passthrough/vtd/iommu.c +++ b/xen/drivers/passthrough/vtd/iommu.c @@ -38,6 +38,7 @@ #define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid) +int nr_iommus; static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */ static int domid_bitmap_size; /* domain id bitmap size in bits */ static unsigned long *domid_bitmap; /* iommu domain id bitmap */ @@ -136,10 +137,37 @@ void iommu_flush_cache_page(void *addr, unsigned long npages) __iommu_flush_cache(addr, PAGE_SIZE_4K * npages); } -int nr_iommus; +/* Allocate page table, return its machine address */ +u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages) +{ + struct acpi_rhsa_unit *rhsa; + struct page_info *pg; + u64 *vaddr; + + rhsa = drhd_to_rhsa(drhd); + if ( !rhsa ) + dprintk(XENLOG_INFO VTDPREFIX, + "IOMMU: RHSA == NULL, IO NUMA memory allocation disabled\n"); + + pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), + rhsa ? rhsa->domain : 0); + if ( !pg ) + return 0; + vaddr = map_domain_page(page_to_mfn(pg)); + if ( !vaddr ) + return 0; + memset(vaddr, 0, PAGE_SIZE * npages); + + iommu_flush_cache_page(vaddr, npages); + unmap_domain_page(vaddr); + + return page_to_maddr(pg); +} + /* context entry handling */ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus) { + struct acpi_drhd_unit *drhd; struct root_entry *root, *root_entries; u64 maddr; @@ -148,7 +176,8 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus) root = &root_entries[bus]; if ( !root_present(*root) ) { - maddr = alloc_pgtable_maddr(NULL, 1); + drhd = iommu_to_drhd(iommu); + maddr = alloc_pgtable_maddr(drhd, 1); if ( maddr == 0 ) { unmap_vtd_domain_page(root_entries); @@ -165,6 +194,8 @@ static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus) static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) { + struct acpi_drhd_unit *drhd; + struct pci_dev *pdev; struct hvm_iommu *hd = domain_hvm_iommu(domain); int addr_width = agaw_to_width(hd->agaw); struct dma_pte *parent, *pte = NULL; @@ -176,8 +207,12 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) addr &= (((u64)1) << addr_width) - 1; ASSERT(spin_is_locked(&hd->mapping_lock)); if ( hd->pgd_maddr == 0 ) - if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0) ) + { + pdev = pci_get_pdev_by_domain(domain, -1, -1); + drhd = acpi_find_matched_drhd_unit(pdev); + if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(drhd, 1)) == 0) ) goto out; + } parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr); while ( level > 1 ) @@ -189,9 +224,13 @@ static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) { if ( !alloc ) break; - maddr = alloc_pgtable_maddr(domain, 1); + + pdev = pci_get_pdev_by_domain(domain, -1, -1); + drhd = acpi_find_matched_drhd_unit(pdev); + maddr = alloc_pgtable_maddr(drhd, 1); if ( !maddr ) break; + dma_set_pte_addr(*pte, maddr); vaddr = map_vtd_domain_page(maddr); @@ -548,13 +587,18 @@ static void iommu_free_pagetable(u64 pt_maddr, int level) static int iommu_set_root_entry(struct iommu *iommu) { + struct acpi_drhd_unit *drhd; u32 sts; unsigned long flags; spin_lock(&iommu->lock); if ( iommu->root_maddr == 0 ) - iommu->root_maddr = alloc_pgtable_maddr(NULL, 1); + { + drhd = iommu_to_drhd(iommu); + iommu->root_maddr = alloc_pgtable_maddr(drhd, 1); + } + if ( iommu->root_maddr == 0 ) { spin_unlock(&iommu->lock); diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c index c4f542f814..c543aa1a69 100644 --- a/xen/drivers/passthrough/vtd/qinval.c +++ b/xen/drivers/passthrough/vtd/qinval.c @@ -416,6 +416,7 @@ static int flush_iotlb_qi( int enable_qinval(struct iommu *iommu) { + struct acpi_drhd_unit *drhd; struct qi_ctrl *qi_ctrl; struct iommu_flush *flush; u32 sts; @@ -428,7 +429,8 @@ int enable_qinval(struct iommu *iommu) if ( qi_ctrl->qinval_maddr == 0 ) { - qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES); + drhd = iommu_to_drhd(iommu); + qi_ctrl->qinval_maddr = alloc_pgtable_maddr(drhd, NUM_QINVAL_PAGES); if ( qi_ctrl->qinval_maddr == 0 ) { dprintk(XENLOG_WARNING VTDPREFIX, diff --git a/xen/drivers/passthrough/vtd/vtd.h b/xen/drivers/passthrough/vtd/vtd.h index d119117068..18e87bef06 100644 --- a/xen/drivers/passthrough/vtd/vtd.h +++ b/xen/drivers/passthrough/vtd/vtd.h @@ -101,7 +101,7 @@ unsigned int get_cache_line_size(void); void cacheline_flush(char *); void flush_all_cache(void); void *map_to_nocache_virt(int nr_iommus, u64 maddr); -u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages); +u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages); void free_pgtable_maddr(u64 maddr); void *map_vtd_domain_page(u64 maddr); void unmap_vtd_domain_page(void *va); diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c index 7c2c80134d..9316cef96f 100644 --- a/xen/drivers/passthrough/vtd/x86/vtd.c +++ b/xen/drivers/passthrough/vtd/x86/vtd.c @@ -44,27 +44,6 @@ void unmap_vtd_domain_page(void *va) unmap_domain_page(va); } -/* Allocate page table, return its machine address */ -u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages) -{ - struct page_info *pg; - u64 *vaddr; - unsigned long mfn; - - pg = alloc_domheap_pages(NULL, get_order_from_pages(npages), - d ? MEMF_node(domain_to_node(d)) : 0); - if ( !pg ) - return 0; - mfn = page_to_mfn(pg); - vaddr = map_domain_page(mfn); - memset(vaddr, 0, PAGE_SIZE * npages); - - iommu_flush_cache_page(vaddr, npages); - unmap_domain_page(vaddr); - - return (u64)mfn << PAGE_SHIFT_4K; -} - void free_pgtable_maddr(u64 maddr) { if ( maddr != 0 ) diff --git a/xen/include/xen/acpi.h b/xen/include/xen/acpi.h index fc1f8317bb..1023d8106d 100644 --- a/xen/include/xen/acpi.h +++ b/xen/include/xen/acpi.h @@ -197,6 +197,7 @@ enum acpi_dmar_entry_type { ACPI_DMAR_DRHD = 0, ACPI_DMAR_RMRR, ACPI_DMAR_ATSR, + ACPI_DMAR_RHSA, ACPI_DMAR_ENTRY_COUNT }; @@ -224,6 +225,12 @@ struct acpi_table_atsr { u16 segment; } __attribute__ ((packed)); +struct acpi_table_rhsa { + struct acpi_dmar_entry_header header; + u32 domain; + u64 address; /* register base address for this drhd */ +} __attribute__ ((packed)); + enum acpi_dev_scope_type { ACPI_DEV_ENDPOINT=0x01, /* PCI Endpoing device */ ACPI_DEV_P2PBRIDGE, /* PCI-PCI Bridge */